# use this line for installing/loading# pacman::p_load()# - packages to load stored in a variable (vector)pkgs <-c("tidyverse","glue","scales","lubridate","patchwork","ggh4x","ggrepel","openintro","ggridges","dsbox","janitor","here","knitr","ggthemes","ggplot2","kableExtra","palmerpenguins","grid","htmltools","plotly","ggforce","cowplot","magick","forcats","stringr","viridis")# - load from the character array/vectorpacman::p_load(char=pkgs)# - install tidyverse/dsbox directly from Git Hub# - this allows for the possible need to install on a repo. pull.# - and, if it's already installed just thorw an alert.if (!requireNamespace("dsbox", quietly =TRUE)) {message("Installing 'dsbox' from GitHub (not found locally)...")suppressMessages(devtools::install_github("tidyverse/dsbox"))} else {message("[FYI]\n'dsbox' already installed — skipping GitHub install.")}
# - alert to user packages loaded.# Set number of columns (adjustable)n_cols <-4# Add * to each package namepkgs <-paste0("* ", pkgs)# Calculate number of rows based on total packagesn_rows <-ceiling(length(pkgs) / n_cols)# Pad with empty strings to complete gridpkgs_padded <-c(pkgs, rep("", n_rows * n_cols -length(pkgs)))# Create matrix (fill by row)pkg_matrix <-matrix(pkgs_padded, nrow = n_rows, byrow =TRUE)# Print headercat("The packages loaded:")
The packages loaded:
Code
# Loop and print each row (use invisible to suppress NULL)invisible(apply(pkg_matrix, 1, function(row) {cat(paste(format(row, width =22), collapse =""), "\n")}))
#-------------------------->####################### Basic set Theme up ######################## ---- set theme for ggplot2ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))# set width of code outputoptions(width =65)# set figure parameters for knitrknitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)## ---- end theme set up
(*@*) - function block
In an effort to reduce repeating code a function block was created.
Code
# ............ A function block, to handle Q3,Q4 with minimal code duplication# - size as a variableset_dot_size <-1# Function for the "All" group plot (g0)plot_all <-function(data) {ggplot(data, aes(x = explanatory_value, y = mean)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(response =as_labeller(response_labels),explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =11) +labs(title ="COVID-19 Vaccine Attitudes by Demographic Group",x =NULL,y =NULL ) +theme(plot.title =element_text(hjust =0.5),strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_text(vjust =0.5,size = strip_text_size,margin =margin(t =20, b =10, r =5, l =5) ),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =15, l =15) ),axis.text.y =element_blank(),axis.text.x =element_blank(),axis.ticks.x =element_blank() )}# Function for the Age plot (g1)plot_age <-function(data) {ggplot(data, aes(x = explanatory_value, y = mean, group = explanatory_value)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =12) +labs(x =NULL,y =NULL ) +theme(strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_blank(),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =13, l =13) ),axis.text.y =element_text(size =10),panel.spacing =unit(1, "lines"),axis.text.x =element_blank(),axis.ticks.x =element_blank() )}# Function for the Gender plot (g2)plot_gender <-function(data) {ggplot(data, aes(x = explanatory_value, y = mean, group = explanatory_value)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =12) +labs(x =NULL,y =NULL ) +theme(strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_blank(),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =5, l =6) ),axis.text.y =element_text(size =10),axis.text.x =element_blank(),panel.spacing =unit(1, "lines"),axis.ticks.x =element_blank() )}# Function for the Race plot (g3)plot_race <-function(data) {ggplot(data, aes(x = explanatory_value, y = mean, group = explanatory_value)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =12) +labs(x =NULL,y =NULL ) +theme(strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_blank(),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =10, l =10) ),axis.text.y =element_text(size =10),panel.spacing =unit(1, "lines"),axis.text.x =element_blank(),axis.ticks.x =element_blank() )}# Function for the Ethnicity plot (g4)plot_ethnicity <-function(data, sub_title_specific) {ggplot(data, aes(x = explanatory_value, y = mean, group = explanatory_value)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =10) +labs(x =NULL,y =paste0("Mean Likert score\n(Error bars: ", sub_title_specific, ")") ) +theme(strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_blank(),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =4, l =7) ),axis.text.y =element_text(size =10),axis.text.x =element_text(size =10),axis.ticks.x =element_line(),panel.spacing =unit(1, "lines") )}# ..... prepare the variables.# . ethnicity.filter_ethnicity_data <-function(data) { data %>%filter(explanatory =="exp_ethnicity") %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(explanatory_value =recode(as.character(explanatory_value),"1"="Hispanic/Latino","2"="Non-Hispanic/Non-Latino"),explanatory_value =factor(explanatory_value, levels =c("Hispanic/Latino", "Non-Hispanic/Non-Latino" )),explanatory =factor(explanatory, levels =c("All", "exp_age_bin", "exp_gender", "exp_race", "exp_ethnicity" )) )}# . agefilter_age_data <-function(data) { data %>%filter(explanatory =="exp_age_bin") %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(explanatory_value =recode(as.character(explanatory_value),"0"="<20","20"="21-25","25"="26-30","30"=">30" ),explanatory_value =factor(explanatory_value, levels =c("<20", "21-25", "26-30", ">30")),explanatory =factor(explanatory, levels =c("All", "exp_age_bin", "exp_gender", "exp_race")) )}# . genderfilter_gender_data <-function(data) { data %>%filter(explanatory =="exp_gender") %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(explanatory_value =as.character(explanatory_value),explanatory_value =fct_recode(factor(explanatory_value),"Prefer not to say"="4","Non-binary third gender"="3","Male"="0","Female"="1" ),explanatory_value =factor(explanatory_value, levels =rev(c("Prefer not to say","Non-binary third gender","Male","Female" ))),explanatory =factor(explanatory, levels =c("All", "exp_age_bin", "exp_gender", "exp_race")) )}# . racefilter_race_data <-function(data) { data %>%filter(explanatory =="exp_race") %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(explanatory_value =recode(as.character(explanatory_value),"1"="American Indian/Alaska Native","2"="Asian","3"="Black/African American","4"="Native Hawaiian/Other Pacific Islander","5"="White" ),explanatory_value =factor(explanatory_value, levels =rev(c("White","Native Hawaiian/Other Pacific Islander","Black/African American","Asian","American Indian/Alaska Native" ))),explanatory =factor(explanatory, levels =c("All", "exp_age_bin", "exp_gender", "exp_race")) )}
1 - Du Bois challenge.
Du Bois challenge. Recreate the following visualization by W.E.B. Du Bois on family budgets split by income classes for 150 families in Atlanta, Georgia. This visualization was originally created using ink and watercolors.
Note: Since there appears to be some allowable creativity with the features reperesented. I left a scale on the bottom of the parchment, and left off the ‘connecting lines’ connecting the same colored segments together for the stacked bar charts. It ended up being a lot of code - to separately construct and place all pieces of the chart together. First effort. There may be a more efficient way to re-make the plot? . I rendered the output image as html - and I cannot git rid of the small ‘png 2’ label (atm).
png 2
A recreation of ‘Income and Expenditure of 150 Negro Families in Atlanta, GA, USA’ Some re-interpretations were taken: - connecting lines between stacked bar chart areas were left off - slightly modified ‘}’ grouping labels were used on right hand side of bar charts. - an axis was rendered on the bottom to help visualize scale
2 - COVID survey - interpretation
Q2 - Interpret what’s occurring in the survey, and discuss any results that go against your intuition. Overall description The COVID vaccine survey gathered responses from medical and nursing students across the U.S. regarding their attitudes toward vaccine safety, trust, and recommendations. The visualization arranges responses in a grid, with response variables in columns and explanatory variables (like age, profession, or gender) in rows. Each pane displays the mean Likert score and error bars between the 10th and 90th percentiles for each subgroup, offering insight into both central tendency and variability. The top row summarizes overall distributions, unconditioned by explanatory factors. - Interesting Trends in the Data: 1. Trust and Profession: Medical students displayed more variability in their agreement with the statement “I trust the information that I have received about the vaccines” compared to nursing students. While both groups leaned toward agreement, the broader spread among medical students suggests more diverse opinions, possibly reflecting deeper exposure to varying sources of information or a more analytical approach to evaluating it. 2. Concern About Side Effects and Age: Across all age groups, responses to concerns about “safety and side effects” hovered around a neutral average (Likert score ≈ 3), with relatively wide error bars. This indicates uncertainty or ambivalence. However, younger students tended to show slightly higher trust (i.e. lower concern scores), suggesting that age may play a role in perceived vaccine risk. 3. Willingness to Recommend and Vaccination History: ARespondents with a history of receiving vaccines overwhelmingly strongly agreed with the statement “I will recommend the vaccine to family, friends, and community members.” This strong consensus highlights how personal experience with vaccines may positively reinforce trust and willingness to advocate for vaccination.
Overall, the data reveal meaningful variation in how medical and nursing students interpret the science and safety of COVID-19 vaccines, highlighting the complexity of attitudes even within healthcare education.
Code
#------- no code necessary ..
3 - COVID survey - reconstruct
Q3 ….
Data Analysis - Q1
📄 The original data frame (raw_preview) has:
- 1123 rows
- 14 columns
✅ Rows with only `response_id` and all other fields missing have been removed.
Original dataset rows: 1121
Rows removed: 10
Cleaned dataset size: 1111 rows × 14 columns
**Rows_Removed**
row:3
row:152
row:153
row:414
row:529
row:556
row:577
row:835
row:987
row:1050
Code
# - Step 1a: print the dim of the original df.original_dim <-dim(raw_preview)cat(glue("📄 The original data frame (`raw_preview`) has:\n","- {original_dim[1]} rows\n","- {original_dim[2]} columns\n\n","⚠️ Rows with no available data (i.e., only `response_id` present)\n will be removed in preprocessing.\n","\n✅ **New Dimensions** of `survey_clean` after cleaning:\n","📊 Rows: {nrow(survey_clean)}\n","📐 Columns: {ncol(survey_clean)}\n"))
📄 The original data frame (`raw_preview`) has:
- 1123 rows
- 14 columns
⚠️ Rows with no available data (i.e., only `response_id` present)
will be removed in preprocessing.
✅ **New Dimensions** of `survey_clean` after cleaning:
📊 Rows: 1111
📐 Columns: 14
Code
#-- ... --- based on info in pdf file and .csv .. encode the following# exp_profession........... # exp_flu_vax.............. # exp_gender............... Q2 What is your gender? # exp_race................. Q3 What is your race? # exp_ethnicity............ Q4 What is your ethnicity? # exp_age_bin.............. Q1 What is your age? # exp_already_vax.......... # resp_safety.............. Q26 Based on my understanding, I believe the vaccine is safe. # resp_confidence_science.. Q34 I am confident in the scientific vetting process for the new COVID vaccines. # resp_concern_safety...... Q27 I am concerned about the safety and side effects of the vaccine. # resp_feel_safe_at_work... Q28 Getting the vaccine will make me feel safer at work. # resp_will_recommend...... Q29 I will recommend the vaccine to family, friends, and community members. # resp_trust_info.......... Q31 I trust the information that I have received about the COVID-19 vaccines.covid_survey_longer <- survey_clean |>pivot_longer(cols =starts_with("exp_"),names_to ="explanatory",values_to ="explanatory_value" ) |>mutate(explanatory_value =as.factor(explanatory_value)) |>filter(!is.na(explanatory_value)) |>pivot_longer(cols =starts_with("resp_"),names_to ="response",values_to ="response_value" )print(covid_survey_longer)
first pivot_longer(): Converts all columns that start with “exp_” (e.g., exp_profession, exp_gender, etc.) from wide format into long format. Creates two new columns: explanatory: holds the original column names (like “exp_profession”) explanatory_value: holds the actual values from those columns (like “Nursing” or “1”) second pivot_longer(): After already pivoting the explanatory variables, this takes the remaining response variables (resp_safety, resp_confidence_science, etc.) and pivots them long as well. Creates two new columns: response: original column name response_value: corresponding value
create the df/tibble: covid_survey_summary_stats_by_group
Code
# - group the data - by explanatory, explanatory_value, and response calc.# - the following stats:# - mean of the response_value# - low 10th percentile of the response_value# - high 90th percentile of the response_value# - rename the df coivd_survey_summart_stats_by_groupcovid_survey_summary_stats_by_group <- covid_survey_longer |>group_by(explanatory, explanatory_value, response) |>summarise(mean =mean(response_value, na.rm =TRUE),low =quantile(response_value, probs =0.10, na.rm =TRUE),high =quantile(response_value, probs =0.90, na.rm =TRUE),.groups ="drop" )print(covid_survey_summary_stats_by_group)
# A tibble: 6 × 6
response mean low high explanatory explanatory_value
<chr> <dbl> <dbl> <dbl> <chr> <fct>
1 resp_concern_s… 3.28 1 5 All ""
2 resp_confidenc… 1.43 1 2 All ""
3 resp_feel_safe… 1.36 1 2 All ""
4 resp_safety 2.03 1 5 All ""
5 resp_trust_info 1.40 1 2 All ""
6 resp_will_reco… 1.21 1 2 All ""
Code
#View(covid_survey_summary_stats_all)
Bind the two df’s create the df/tibble: covid_summary_of_stats
Code
# Get existing levels from grouped dataage_levels <-levels(covid_survey_summary_stats_by_group$explanatory_value)# Add a new level to represent the 'All' groupage_levels_with_all <-c(age_levels, "")# Create the all-summary with the new factor levelcovid_survey_summary_stats_all <- covid_survey_longer |>group_by(response) |>summarise(mean =mean(response_value, na.rm =TRUE),low =quantile(response_value, probs =0.10, na.rm =TRUE),high =quantile(response_value, probs =0.90, na.rm =TRUE),explanatory ="All",explanatory_value =factor("", levels = age_levels_with_all),.groups ="drop" )# Ensure grouped summary has the same levels toocovid_survey_summary_stats_by_group$explanatory_value <-factor( covid_survey_summary_stats_by_group$explanatory_value,levels = age_levels_with_all)# Bind them safely nowcovid_survey_summary_stats <-bind_rows( covid_survey_summary_stats_all, covid_survey_summary_stats_by_group)print(covid_survey_summary_stats)
# A tibble: 132 × 6
response mean low high explanatory explanatory_value
<chr> <dbl> <dbl> <dbl> <chr> <fct>
1 resp_concern_… 3.28 1 5 All ""
2 resp_confiden… 1.43 1 2 All ""
3 resp_feel_saf… 1.36 1 2 All ""
4 resp_safety 2.03 1 5 All ""
5 resp_trust_in… 1.40 1 2 All ""
6 resp_will_rec… 1.21 1 2 All ""
7 resp_concern_… 3.35 2 4.4 exp_age_bin "0"
8 resp_confiden… 1.65 1 2.4 exp_age_bin "0"
9 resp_feel_saf… 1.71 1 3.8 exp_age_bin "0"
10 resp_safety 1.41 1 2 exp_age_bin "0"
# ℹ 122 more rows
Q3e - recreate plot
Code
# Labels for rows (explanatory variables), including Gender and Raceexplanatory_labels <-c(All ="All",exp_age_bin ="Age",exp_gender ="Gender",exp_race ="Race" ,# Added Race labelexp_ethnicity ="Ethnicity")# - call formatting for encoded datacovid_age_only <-filter_age_data(covid_survey_summary_stats_by_group)covid_gender_only <-filter_gender_data(covid_survey_summary_stats_by_group)covid_race_only <-filter_race_data(covid_survey_summary_stats_by_group)covid_ethnicity_only <-filter_ethnicity_data(covid_survey_summary_stats_by_group)# Label mappings for responseresponse_labels <-c(resp_safety ="Vaccine is safe",resp_feel_safe_at_work ="Feel safer\n at work",resp_concern_safety ="Concern about \nvaccine safety",resp_confidence_science ="Confidence in \nscientific vetting",resp_trust_info ="Trust in \nvaccine info",resp_will_recommend ="Will recommend\nvaccine")# Reorder response factor levels to match response_labelscovid_age_only <- covid_age_only %>%mutate(response =factor(response, levels =names(response_labels)))covid_gender_only <- covid_gender_only %>%mutate(response =factor(response, levels =names(response_labels)))# View distinct codes used in the exp_ethnicity variablecovid_survey_summary_stats_by_group %>%filter(explanatory =="exp_ethnicity") %>%mutate(explanatory_value =as.character(explanatory_value)) %>%distinct(explanatory_value) %>%arrange(explanatory_value)
# A tibble: 2 × 1
explanatory_value
<chr>
1 1
2 2
Code
# Vector controlling heights of each row - add height for racerow_heights <-c(0.5, # - 'All' row height — adjust as needed3, # - 'exp_age_bin' row height3, # - 'exp_gender' row height - adjust as desired3, # - 'exp_race' row height - new Race row3# - ethnicity)# Reorder response factor levels for 'All' layercovid_all_only <- covid_survey_summary_stats_all %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(response =factor(response, levels =names(response_labels)))# - vars for standardizing box size row/col# Define variables for strip appearancestrip_fill_color <-"gray90"strip_text_color <-"black"strip_text_size <-10strip_text_face <-"plain"strip_text_angle_x <-0strip_text_angle_y <-0strip_text_vjust_y <-0.5strip_placement <-"outside"# already used in your code# Call some functionsg0 <-plot_all(covid_all_only)# - second layer - Ageg1 <-plot_age(covid_age_only)# - third layer - genderg2 <-plot_gender(covid_gender_only)# Fourth layer - Raceg3 <-plot_race(covid_race_only)# Fifth layer: Ethnicity (if present)g4 <-plot_ethnicity(covid_ethnicity_only,"Error bars in range from 10th to 90th percentile")# Composite plot with 5 layers stacked (All / Age / Gender / Race / Ethnicity)composite_plot <- (g0 / g1 / g2 / g3 / g4 +plot_layout(heights = row_heights)) &theme(plot.margin =margin(0, 0, 0, 0))print(composite_plot)
A reconstruction of the table on CoVid-19 opinions from: The data were collected by Pavan Shah, Giorgio Caturegli, Galen Shi, and Joshua Materi at Johns Hopkins School of Medicine.Note: Error bars shown range from 10th-90th percentile. Note: Not all data was supplied to complete a full re-construction of the table.
#>>>>based on info in pdf file and .csv .. encode the following
# exp_profession...........
# exp_flu_vax..............
# exp_gender............... Q2 What is your gender?
# exp_race................. Q3 What is your race?
# exp_ethnicity............ Q4 What is your ethnicity?
# exp_age_bin.............. Q1 What is your age?
# exp_already_vax..........
# resp_safety.............. Q26 Based on my understanding, I believe the vaccine is safe.
# resp_confidence_science.. Q34 I am confident in the scientific vetting process for the new COVID vaccines.
# resp_concern_safety...... Q27 I am concerned about the safety and side effects of the vaccine.
# resp_feel_safe_at_work... Q28 Getting the vaccine will make me feel safer at work.
# resp_will_recommend...... Q29 I will recommend the vaccine to family, friends, and community members.
# resp_trust_info.......... Q31 I trust the information that I have received about the COVID-19 vaccines.
4 - COVID survey - re-reconstruct
Q4 ….Make Plot from Q3, but use different end point quarantiles. When the error bars represent the 25th and 75th percentiles instead of the 10th and 90th, the intervals become narrower, reflecting a tighter range around the median of the data. This change reduces the apparent variability and uncertainty in responses. Compared to the previous plot, the shorter error bars may make the group differences appear more precise but potentially understate the true variability. Therefore, while the overall trends remain similar, conclusions about the degree of uncertainty should be adjusted to recognize that the interquartile range excludes more extreme values.
# A tibble: 132 × 6
response mean low high explanatory explanatory_value
<chr> <dbl> <dbl> <dbl> <chr> <fct>
1 resp_concern_… 3.28 2 4 All ""
2 resp_confiden… 1.43 1 2 All ""
3 resp_feel_saf… 1.36 1 1 All ""
4 resp_safety 2.03 1 3 All ""
5 resp_trust_in… 1.40 1 2 All ""
6 resp_will_rec… 1.21 1 1 All ""
7 resp_concern_… 3.35 2 4 exp_age_bin "0"
8 resp_confiden… 1.65 1 2 exp_age_bin "0"
9 resp_feel_saf… 1.71 1 2 exp_age_bin "0"
10 resp_safety 1.41 1 2 exp_age_bin "0"
# ℹ 122 more rows
A reconstruction of the table on CoVid-19 opinions from: The data were collected by Pavan Shah, Giorgio Caturegli, Galen Shi, and Joshua Materi at Johns Hopkins School of Medicine. Note: Error bars shown range from 25th-75th percentile. Note: Not all data was supplied to complete a full re-construction of the table.
5 - COVID survey - another view
Q5a …. COVID survey - another view. Create two bar charts of the Likert data for the six survey questions in from the plot in Exercise 2. This should be a single plot visualizing the percentages of each possible answer, with different questions on the y-axis. Use an appropriate color scale.
a. Create a diverging bar chart. Write alt text for your visualization as well. Write alt text for your visualization as well.
Code
# Define response labelsresponse_labels <-c(resp_safety ="Vaccine is safe",resp_feel_safe_at_work ="Feel safer\n at work",resp_concern_safety ="Concern about \nvaccine safety",resp_confidence_science ="Confidence in \nscientific vetting",resp_trust_info ="Trust in \nvaccine info",resp_will_recommend ="Will recommend\nvaccine")# Step 1: Compute % response per question and response_valuelikert_summary <- covid_survey_longer %>%group_by(response, response_value) %>%summarise(count =n(), .groups ="drop") %>%group_by(response) %>%mutate(percent = count /sum(count) *100) %>%ungroup() %>%mutate(response =factor(response, levels =unique(response)), # preserve orderresponse_value =factor(response_value, levels =1:5) )# Center percentages for diverging bar chart (v1)likert_summary <- likert_summary %>%mutate(centered_percent =case_when( response_value <3~-percent, response_value ==3~0, response_value >3~ percent ) )
Warning: There were 2 warnings in `mutate()`.
The first warning was:
ℹ In argument: `centered_percent = case_when(...)`.
Caused by warning in `Ops.factor()`:
! '<' not meaningful for factors
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining
warning.
response response_value count
Length:36 Min. :1 Min. : 75
Class :character 1st Qu.:2 1st Qu.: 213
Mode :character Median :3 Median : 346
Mean :3 Mean :1206
3rd Qu.:4 3rd Qu.:1484
Max. :5 Max. :6077
NA's :6
percent mean_value deviation
Min. : 1.036 Min. :1.212 Min. :-2.2777
1st Qu.: 2.943 1st Qu.:1.357 1st Qu.:-0.1676
Median : 4.780 Median :1.417 Median : 1.2656
Mean :16.667 Mean :1.786 Mean : 1.2142
3rd Qu.:20.506 3rd Qu.:2.034 3rd Qu.: 2.5919
Max. :83.960 Max. :3.278 Max. : 3.7879
NA's :6
centered_percent
Min. :-59.794
1st Qu.: -2.306
Median : 4.569
Mean : 0.000
3rd Qu.: 6.524
Max. : 49.331
NA's :6
Code
# Diverging bar chart (centered around mean), with descriptive y-axis labelsg5 <-ggplot(likert_summary, aes(x = centered_percent,y =fct_rev(fct_relabel(factor(response), ~ response_labels[.x])),fill =factor(response_value))) +geom_bar(stat ="identity", width =0.7) +scale_fill_manual(values =c("1"="#d73027", "2"="#fc8d59", "3"="#ffffbf", "4"="#91bfdb", "5"="#4575b4"),name ="Likert response",labels =c("Strongly disagree", "Disagree", "Neutral", "Agree", "Strongly agree") ) +labs(title ="Diverging Bar Chart of Likert Responses Centered Around Question Mean",x ="Deviation × Percentage",y ="Survey question" ) +theme_minimal()print(g5)
Warning: Removed 6 rows containing missing values or values outside the
scale range (`geom_bar()`).
Q5a ….alt text description Alt text: A diverging bar chart visualizes Likert-scale responses to six COVID-19 vaccine-related statements. Each statement is represented by a horizontal bar segmented by response categories: strongly disagree, disagree, neutral, agree, strongly agree, and NA, each in a distinct color. The x-axis shows a centered scale ranging from -60 to 60, indicating deviation from the mean response, scaled by percent. Statements include concerns about safety, trust in vaccine information, and willingness to recommend vaccination. The chart reveals variation in sentiment and notable asymmetries across response patterns.
Q5b …. COVID survey - another view. Create two bar charts of the Likert data for the six survey questions in from the plot in Exercise 2. This should be a single plot visualizing the percentages of each possible answer, with different questions on the y-axis. Use an appropriate color scale. b. Create a 100% bar chart Write alt text for your visualization as well.
Code
# Ensure response_value is factor for 100% chartlikert_summary <- likert_summary %>%mutate(response_value =factor(response_value, levels =1:5))# 100% stacked bar chart, with descriptive y-axis labelsg100 <-ggplot(likert_summary, aes(x = percent,y =fct_rev(fct_relabel(factor(response), ~ response_labels[.x])),fill = response_value)) +geom_bar(stat ="identity", width =0.7) +scale_fill_manual(values =c("1"="#d73027", "2"="#fc8d59", "3"="#ffffbf", "4"="#91bfdb", "5"="#4575b4"),name ="Response",labels =c("Strongly disagree", "Disagree", "Neutral", "Agree", "Strongly agree") ) +labs(title ="100% Stacked Bar Chart of Likert Responses",x ="Percentage of responses",y ="Survey question" ) +theme_minimal()print(g100)
Q5b ….alt text description Alt text: A 100% stacked bar chart displays Likert-scale survey responses to six statements about COVID-19 vaccines. Each bar corresponds to a different statement, such as “Concern about vaccine safety” and “Will recommend vaccine.” The bars are divided into color-coded segments representing response options: Strongly disagree (light blue), Disagree (orange), Neutral (yellow), Agree (red), Strongly agree (dark blue), and NA (gray). The x-axis shows percentage values from 0 to 100%, allowing for comparison of relative agreement across the questions. The chart highlights variation in public sentiment—some questions skew toward strong agreement, while others show more mixed or polarized responses.
Source Code
---title: "HW 03"author: "Nathan Herling"date: "2025-06-20"_due: "Friday-June-13-2025" format: html: embed-resources: true code-fold: true code-tools: true toc: true css: styles.css # ✅ Add this line to link your CSS file---```{r setup}#--------------------->################## Package Setup ###################Check if pacman [package manager] is installed, if not install it.#throw [FYI] alert either way.if (!requireNamespace("pacman", quietly = TRUE)) { message("Installing 'pacman' (not found locally)...") install.packages("pacman")} else { message("[FYI]\n'pacman' already installed — skipping install.")}# use this line for installing/loading# pacman::p_load()# - packages to load stored in a variable (vector)pkgs <- c( "tidyverse", "glue", "scales", "lubridate", "patchwork", "ggh4x", "ggrepel", "openintro", "ggridges", "dsbox", "janitor", "here", "knitr", "ggthemes", "ggplot2", "kableExtra", "palmerpenguins", "grid", "htmltools", "plotly", "ggforce", "cowplot", "magick", "forcats", "stringr", "viridis")# - load from the character array/vectorpacman::p_load(char=pkgs)# - install tidyverse/dsbox directly from Git Hub# - this allows for the possible need to install on a repo. pull.# - and, if it's already installed just thorw an alert.if (!requireNamespace("dsbox", quietly = TRUE)) { message("Installing 'dsbox' from GitHub (not found locally)...") suppressMessages(devtools::install_github("tidyverse/dsbox"))} else { message("[FYI]\n'dsbox' already installed — skipping GitHub install.")}# - alert to user packages loaded.# Set number of columns (adjustable)n_cols <- 4# Add * to each package namepkgs <- paste0("* ", pkgs)# Calculate number of rows based on total packagesn_rows <- ceiling(length(pkgs) / n_cols)# Pad with empty strings to complete gridpkgs_padded <- c(pkgs, rep("", n_rows * n_cols - length(pkgs)))# Create matrix (fill by row)pkg_matrix <- matrix(pkgs_padded, nrow = n_rows, byrow = TRUE)# Print headercat("The packages loaded:")# Loop and print each row (use invisible to suppress NULL)invisible(apply(pkg_matrix, 1, function(row) { cat(paste(format(row, width = 22), collapse = ""), "\n")}))#-------------------------->####################### Basic set Theme up ######################## ---- set theme for ggplot2ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))# set width of code outputoptions(width = 65)# set figure parameters for knitrknitr::opts_chunk$set( fig.width = 7, # 7" width fig.asp = 0.618, # the golden ratio fig.retina = 3, # dpi multiplier for displaying HTML output on retina fig.align = "center", # center align figures dpi = 300 # higher dpi, sharper image)## ---- end theme set up```## (*@*) - function block<div class="question-box">In an effort to reduce repeating code a function block was created.</div>```{r}#| label: label-me-007# ............ A function block, to handle Q3,Q4 with minimal code duplication# - size as a variableset_dot_size <-1# Function for the "All" group plot (g0)plot_all <-function(data) {ggplot(data, aes(x = explanatory_value, y = mean)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(response =as_labeller(response_labels),explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =11) +labs(title ="COVID-19 Vaccine Attitudes by Demographic Group",x =NULL,y =NULL ) +theme(plot.title =element_text(hjust =0.5),strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_text(vjust =0.5,size = strip_text_size,margin =margin(t =20, b =10, r =5, l =5) ),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =15, l =15) ),axis.text.y =element_blank(),axis.text.x =element_blank(),axis.ticks.x =element_blank() )}# Function for the Age plot (g1)plot_age <-function(data) {ggplot(data, aes(x = explanatory_value, y = mean, group = explanatory_value)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =12) +labs(x =NULL,y =NULL ) +theme(strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_blank(),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =13, l =13) ),axis.text.y =element_text(size =10),panel.spacing =unit(1, "lines"),axis.text.x =element_blank(),axis.ticks.x =element_blank() )}# Function for the Gender plot (g2)plot_gender <-function(data) {ggplot(data, aes(x = explanatory_value, y = mean, group = explanatory_value)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =12) +labs(x =NULL,y =NULL ) +theme(strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_blank(),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =5, l =6) ),axis.text.y =element_text(size =10),axis.text.x =element_blank(),panel.spacing =unit(1, "lines"),axis.ticks.x =element_blank() )}# Function for the Race plot (g3)plot_race <-function(data) {ggplot(data, aes(x = explanatory_value, y = mean, group = explanatory_value)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =12) +labs(x =NULL,y =NULL ) +theme(strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_blank(),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =10, l =10) ),axis.text.y =element_text(size =10),panel.spacing =unit(1, "lines"),axis.text.x =element_blank(),axis.ticks.x =element_blank() )}# Function for the Ethnicity plot (g4)plot_ethnicity <-function(data, sub_title_specific) {ggplot(data, aes(x = explanatory_value, y = mean, group = explanatory_value)) +geom_errorbar(aes(ymin = low, ymax = high), width =0.2) +geom_point(size = set_dot_size, color ="black") +coord_flip() +facet_grid(rows =vars(explanatory),cols =vars(response),labeller =labeller(explanatory =as_labeller(explanatory_labels) ) ) +theme_minimal(base_size =10) +labs(x =NULL,y =paste0("Mean Likert score\n(Error bars: ", sub_title_specific, ")") ) +theme(strip.background =element_rect(fill = strip_fill_color, color ="black"),strip.placement = strip_placement,strip.text.x =element_blank(),strip.text.y.right =element_text(angle =0,vjust =0.5,margin =margin(t =10, b =10, r =4, l =7) ),axis.text.y =element_text(size =10),axis.text.x =element_text(size =10),axis.ticks.x =element_line(),panel.spacing =unit(1, "lines") )}# ..... prepare the variables.# . ethnicity.filter_ethnicity_data <-function(data) { data %>%filter(explanatory =="exp_ethnicity") %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(explanatory_value =recode(as.character(explanatory_value),"1"="Hispanic/Latino","2"="Non-Hispanic/Non-Latino"),explanatory_value =factor(explanatory_value, levels =c("Hispanic/Latino", "Non-Hispanic/Non-Latino" )),explanatory =factor(explanatory, levels =c("All", "exp_age_bin", "exp_gender", "exp_race", "exp_ethnicity" )) )}# . agefilter_age_data <-function(data) { data %>%filter(explanatory =="exp_age_bin") %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(explanatory_value =recode(as.character(explanatory_value),"0"="<20","20"="21-25","25"="26-30","30"=">30" ),explanatory_value =factor(explanatory_value, levels =c("<20", "21-25", "26-30", ">30")),explanatory =factor(explanatory, levels =c("All", "exp_age_bin", "exp_gender", "exp_race")) )}# . genderfilter_gender_data <-function(data) { data %>%filter(explanatory =="exp_gender") %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(explanatory_value =as.character(explanatory_value),explanatory_value =fct_recode(factor(explanatory_value),"Prefer not to say"="4","Non-binary third gender"="3","Male"="0","Female"="1" ),explanatory_value =factor(explanatory_value, levels =rev(c("Prefer not to say","Non-binary third gender","Male","Female" ))),explanatory =factor(explanatory, levels =c("All", "exp_age_bin", "exp_gender", "exp_race")) )}# . racefilter_race_data <-function(data) { data %>%filter(explanatory =="exp_race") %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(explanatory_value =recode(as.character(explanatory_value),"1"="American Indian/Alaska Native","2"="Asian","3"="Black/African American","4"="Native Hawaiian/Other Pacific Islander","5"="White" ),explanatory_value =factor(explanatory_value, levels =rev(c("White","Native Hawaiian/Other Pacific Islander","Black/African American","Asian","American Indian/Alaska Native" ))),explanatory =factor(explanatory, levels =c("All", "exp_age_bin", "exp_gender", "exp_race")) )}```## 1 - Du Bois challenge.<div class="question-box">Du Bois challenge. Recreate the following visualization by W.E.B. Du Bois on family budgets split by income classes for 150 families in Atlanta, Georgia. This visualization was originally created using ink and watercolors.</div><div class="note-box">Note: Since there appears to be some allowable creativity with the features reperesented.I left a scale on the bottom of the parchment, and left off the 'connecting lines' connecting the same colored segments together for the stacked bar charts.It ended up being <b>a lot</b> of code - to separately construct and place all pieces of the chart together.First effort. There may be a more efficient way to re-make the plot?.I rendered the output image as html - and I cannot git rid of the small 'png 2' label (atm).</div>```{r,fig.width=12, fig.height=8, out.width="95%",fig.cap=NULL, fig.scap=NULL, fig.lp=NULL}#| label: label-me-1#| echo: false#| results: asis# Load the dataincome_data <- read_csv("data/income.csv", show_col_types = FALSE)# --- New: Extract data for col_0, row_1..5 overlay table ---table_data <- income_data |> select(Class, `Actual Average` = Average_Income) |> mutate(`Actual Average` = dollar(`Actual Average`)) # Format as US Dollars# Add header rowheader_row <- tibble(Class = "Class", `Actual Average` = "ACTUAL AVERAGE")table_data <- bind_rows(header_row, table_data)# Load parchment imageinvisible(background_img <- image_read("images/parchment_sheet.png"))#- Get image dimensions ... for initial examination of how large the background image is.invisible(info <- image_info(background_img))img_width <- info$widthimg_height <- info$height# Margins and title heightmargin <- 40title_height <- 100gap_below_title <- -50# Create transparent canvas for the title texttitle_canvas <- image_blank(width = img_width, height = title_height + margin, color = "none")# Annotate the titletitle_text <- "INCOME AND EXPENDITURE OF 150 NEGRO FAMILIES IN ATLANTA,GA.,USA."title_layer <- image_annotate( title_canvas, text = title_text, size = 25, gravity = "north", location = "+0+40", font = "IM FELL English SC", weight = 700, color = "#000000BB")title_layer <- image_blur(title_layer, radius = 0.5, sigma = 0.3)composite_img <- image_composite(background_img, title_layer, offset = "+0+0")# Compute usable space for grid below titleusable_width <- img_width - 2 * marginusable_height <- img_height - margin - title_heightn_cols <- 6n_rows <- 4cell_width <- usable_width / n_colscell_height <- usable_height / n_rows# Grid positionsgrid_top <- margin + title_height + gap_below_titlegrid_bottom <- img_height - margingrid_left <- margingrid_right <- img_width - margin# Read and resize image to fit cell (0,0)annual_income_img <- image_read("images/annual_income.png")annual_income_img_resized <- image_scale(annual_income_img, geometry = paste0(cell_width, "x", cell_height, "!"))x_pos <- grid_lefty_pos <- grid_topcomposite_img <- image_composite(composite_img, annual_income_img_resized, offset = paste0("+", x_pos, "+", y_pos))# Add label with border across cols 1 to 5span_x_left <- grid_left + cell_width * 1span_x_right <- grid_left + cell_width * 6span_width <- span_x_right - span_x_leftlabel_text <- image_blank(width = span_width, height = 50, color = "none")label_text <- image_annotate( label_text, text = "ANNUAL EXPENDITURE FOR", size = 25, gravity = "center", font = "IM FELL English SC", weight = 0, color = "#000000BB")label_trimmed <- image_trim(label_text)label_padded <- image_extent(label_trimmed, geometry = paste0(span_width, "x", image_info(label_trimmed)$height), gravity = "center")label_with_border <- image_border(label_padded, color = "black", geometry = "2x2")label_height <- image_info(label_with_border)$heightlabel_y <- grid_top - label_height + 2label_x <- span_x_leftcomposite_img <- image_composite( composite_img, label_with_border, offset = paste0("+", round(label_x), "+", round(label_y)))# ................# Add column titles (RENT, FOOD, etc.)top_col_titles <- c("RENT", "FOOD", "CLOTHES", "DIRECT TAXES", "OTHER EXPENSES AND SAVINGS")title_height_area <- 15 # uniform height for all title boxesfor (i in 1:5) { title_x_left <- grid_left + (i * cell_width) title_width <- cell_width # Estimate font size to fit the column width max_text_width <- title_width - 10 # padding title_font_size <- 25 title_box_temp <- image_blank(width = title_width, height = title_height_area, color = "none") repeat { title_test <- image_annotate( title_box_temp, text = top_col_titles[i], size = title_font_size, gravity = "center", font = "Broadway", weight = 0, color = "#000000BB" ) if (image_info(title_test)$width <= max_text_width || title_font_size <= 9.5) break title_font_size <- title_font_size - 1 } title_box <- image_blank(width = title_width, height = title_height_area, color = "none") title_annotated <- image_annotate( title_box, text = top_col_titles[i], size = title_font_size, gravity = "center", font = "IM FELL English SC", weight = 0, color = "#000000BB" ) title_with_border <- image_border(title_annotated, color = "black", geometry = "2x2") title_y <- grid_top - title_height_area + 15 title_x <- title_x_left composite_img <- image_composite( composite_img, title_with_border, offset = paste0("+", round(title_x), "+", round(title_y)) )}# --- NEW: Add image icons below headers and above footers ---images_col_headers <- c( "images/rent.jpg", "images/food.png", "images/clothes.png", "images/direct_taxes.png", "images/other_expenses_savings.png")icon_height <- 132icon_gap_top <- 3icon_gap_bottom <- 3for (i in 1:5) { icon_x_left <- grid_left + (i * cell_width) icon_width <- cell_width icon_y <- grid_top + title_height_area + icon_gap_top icon_img <- image_read(images_col_headers[i]) icon_resized <- image_scale(icon_img, geometry = paste0(icon_width, "x", icon_height, "!")) icon_with_border <- image_border(icon_resized, color = "black", geometry = "2x2") composite_img <- image_composite( composite_img, icon_with_border, offset = paste0("+", round(icon_x_left), "+", round(icon_y)) )}# --- Restore column footers with colors for columns 1 to 5 ---c_palette <- c("#1B1B19", "#755D75", "#C18C7E", "#ab9f9d", "#d9e6e8")footer_height_area <- title_height_areafor (i in 1:5) { footer_x_left <- grid_left + (i * cell_width) footer_width <- cell_width footer_box <- image_blank(width = footer_width, height = footer_height_area, color = c_palette[i]) footer_with_border <- image_border(footer_box, color = "black", geometry = "2x2") footer_y <- grid_top + cell_height - footer_height_area - 4 footer_x <- footer_x_left composite_img <- image_composite( composite_img, footer_with_border, offset = paste0("+", round(footer_x), "+", round(footer_y)) )}#--------------------------------------------------------------------->>> checked#.... LEFT COLUMN TABLE ==================================>>># --- NEW: Render table in col_0, rows 0 to 6 (header + 6 rows) ---# --- UPDATED: Render table spanning from row_1 to row_5 (entire vertical height of 4 grid rows) ---library(stringr) # make sure stringr is loaded# Add horizontal scale factor (e.g., 0.85 means 85% width)horizontal_scale <- 0.91col_width <- (cell_width / 2) * horizontal_scale # Scaled column widthcell_x <- grid_leftnum_rows <- nrow(table_data)table_top_y <- grid_top + cell_height + 5 # start at row_1 - manually adjust height(s)table_total_height <- 4 * cell_height # span rows 1 to 5 (4 grid rows)header_row_height <- 20 # smaller height for headerdata_rows_height <- table_total_height - header_row_heightdata_row_height <- data_rows_height / (num_rows - 1) - 25 # divide remaining height among data rows# Define wrap width — adjust based on your cell width and font sizewrap_width <- 10# Render header rowrow_y <- table_top_y# Wrap the header cell text (Class)class_text_wrapped <- str_wrap(table_data$Class[1], width = wrap_width)class_box <- image_blank(width = col_width, height = header_row_height, color = "none")class_box <- image_annotate( class_box, text = class_text_wrapped, size = 9, gravity = "center", font = "IM FELL English SC", weight = 300, color = "#000000DD")class_box_bordered <- image_border(class_box, color = "black", geometry = "1x1")# income header as beforeincome_box <- image_blank(width = col_width, height = header_row_height, color = "none")income_box <- image_annotate( income_box, text = table_data$`Actual Average`[1], size = 7.5, gravity = "center", font = "IM FELL English SC", weight = 300, color = "#000000DD")income_box_bordered <- image_border(income_box, color = "black", geometry = "1x1")composite_img <- image_composite(composite_img, class_box_bordered, offset = paste0("+", cell_x, "+", round(row_y)))composite_img <- image_composite(composite_img, income_box_bordered, offset = paste0("+", cell_x + col_width, "+", round(row_y)))row_y <- row_y + header_row_height# Render data rows with pale brownish backgroundfor (i in 2:num_rows) { bg_col <- ifelse(i %% 2 == 0, "#E3D6BD99", "#FFFFFF00") class_bg <- image_blank(width = col_width, height = data_row_height, color = bg_col) income_bg <- image_blank(width = col_width, height = data_row_height, color = bg_col) # Wrap class cell text here class_text_wrapped <- str_wrap(table_data$Class[i], width = wrap_width) class_cell <- image_annotate( class_bg, text = class_text_wrapped, size = 9, gravity = "center", font = "IM FELL English SC", weight = 300, color = "#000000DD" ) income_cell <- image_annotate( income_bg, text = table_data$`Actual Average`[i], size = 9, gravity = "center", font = "IM FELL English SC", weight = 300, color = "#000000DD" ) class_cell_bordered <- image_border(class_cell, color = "black", geometry = "1x1") income_cell_bordered <- image_border(income_cell, color = "black", geometry = "1x1") composite_img <- image_composite(composite_img, class_cell_bordered, offset = paste0("+", cell_x, "+", round(row_y))) composite_img <- image_composite(composite_img, income_cell_bordered, offset = paste0("+", cell_x + col_width, "+", round(row_y))) row_y <- row_y + data_row_height}#... BAR CHART OVERLAY WITH CONTROLS ===============================>>># --- Adjustable Variables ---bar_chart_height_scale <- 0.75 # Controls vertical compression of bar chart (0 to 1)bar_spacing_ratio <- 0.7 # Controls thickness of bars (height of each bar)bar_chart_offset_x <- 0 # Horizontal adjustment in pixelsbar_chart_offset_y <- 5 # Vertical adjustment in pixelsbar_gap_size <- 1 # Controls vertical spacing between bar groups (not used now)# --- Data Prep ---income_long <- income_data %>% pivot_longer(cols = c("Rent", "Food", "Clothes", "Tax", "Other"), names_to = "name", values_to = "value") %>% filter(!is.na(value), value != 0) %>% mutate(name = toupper(name)) %>% mutate(textcol = ifelse(name == "RENT", "1", "0"))# --- Create spacer factor levels between Classes ---classes <- rev(unique(income_data$Class)) # Reversed order for top-to-bottom inversionspacer_levels <- paste0("SPACER_", seq_len(length(classes) - 1))# Interleave classes and spacer_levels manuallynew_levels <- character(length(classes) + length(spacer_levels))new_levels[c(TRUE, FALSE)] <- classesnew_levels[c(FALSE, TRUE)] <- spacer_levels# Assign Class_spaced with original classes firstincome_long <- income_long %>% mutate(Class_spaced = factor(as.character(Class), levels = classes))# Create spacer rows with zero value for each spacer levelspacer_rows <- data.frame( Class_spaced = factor(spacer_levels, levels = new_levels), name = "SPACER", value = 0, textcol = "0")income_augmented <- bind_rows(income_long, spacer_rows) %>% mutate(Class_spaced = factor(as.character(Class_spaced), levels = new_levels)) %>% arrange(Class_spaced, name != "SPACER")# Prepare a helper dataset for connectorsstack_order <- c("OTHER", "TAX", "CLOTHES", "FOOD", "RENT")connector_data <- income_augmented %>% filter(name != "SPACER") %>% mutate(name = factor(name, levels = stack_order)) %>% group_by(name) %>% mutate(group_index = row_number()) %>% ungroup()# Build the plotincome_plot <- ggplot(income_augmented, aes( x = Class_spaced, y = value, fill = factor(name, levels = stack_order), label = paste0(formatC(value, format = "f", digits = 1), "%"))) + geom_bar( stat = "identity", position = "stack", width = bar_spacing_ratio, color = NA, linewidth = 0.2 ) + geom_text( data = subset(income_augmented, name != "SPACER"), aes(color = textcol), position = position_stack(vjust = 0.5), size = 3, show.legend = FALSE ) + coord_flip() + scale_x_discrete(expand = expansion(add = c(0.5, 0.5))) + theme( legend.position = "none", plot.margin = margin(20 + bar_chart_offset_y, 40 + bar_chart_offset_x, 0, 20), plot.background = element_rect(fill = NA, color = NA), panel.background = element_rect(fill = NA, color = NA), legend.background = element_rect(fill = NA), legend.key = element_rect(fill = NA), axis.title = element_blank(), text = element_text(family = "mono"), axis.ticks = element_blank(), axis.line = element_blank(), panel.border = element_blank(), axis.text.y = element_blank(), plot.title = element_blank(), panel.grid = element_blank() ) + scale_fill_manual(values = c("#cbdfbd", "#8e9aaf", "#d78879", "#a08294", "#161213")) + scale_color_manual(values = c("black", "white")) + labs(fill = "", x = "", y = "")# --- Render and Position Chart ---temp_file <- tempfile(fileext = ".png")ggsave(filename = temp_file, plot = income_plot, width = 8, height = 5, dpi = 144, bg = "transparent")bar_chart_img <- image_read(temp_file)bar_chart_width <- cell_width * 5.5bar_chart_height <- cell_height * 4 * bar_chart_height_scale # Scale heightbar_chart_resized <- image_scale(bar_chart_img, paste0(bar_chart_width, "x", bar_chart_height, "!"))bar_chart_x <- grid_left + cell_width + bar_chart_offset_x - 50bar_chart_y <- grid_top + cell_height + bar_chart_offset_y# ... render images on top of each other..composite_img <- image_composite( composite_img, bar_chart_resized, offset = paste0("+", round(bar_chart_x), "+", round(bar_chart_y)))# === Add Right-Side Vertical Class Labels ===# Define the labels and how many bar rows they spanright_labels <- c("POOR", "FAIR", "COMFORTABLE", "WELL-TO-DO")row_counts <- c(2, 2, 2, 1)# Total rows = 7 actual class rows (no spacers included)total_rows <- sum(row_counts)# Compute height per row (based on the bar chart height)row_height <- bar_chart_height / total_rows# Starting Y for the bottom-most labellabel_start_y <- bar_chart_y# Label X position (to the right of bar chart)label_x <- bar_chart_x + bar_chart_width - 150 # Add paddingshift_down_pixels <- 10 # adjust as you want (positive = move down)# -for (i in seq_along(right_labels)) { label_text <- right_labels[i] rows_span <- row_counts[i] label_height <- row_height * rows_span label_y <- 5 + label_start_y + (row_height * (sum(row_counts[1:(i-1)]))) + (label_height / 2) # Move POOR up 2 rows (your existing adjustment) if (label_text == "POOR") { label_y <- label_y - 2 * row_height } # NEW: move POOR and FAIR *down* by shift_down_pixels if (label_text %in% c("POOR", "FAIR")) { label_y <- label_y + shift_down_pixels } # Create and annotate image label_img <- image_blank(width = 30, height = label_height, color = "none") label_img <- image_annotate( label_img, text = label_text, size = 11, gravity = "center", font = "IM FELL English SC", weight = 300, color = "#000000DD", degrees = 270 # Vertical text (bottom to top) ) #label_img <- image_trim(label_img) # <-- removes padding # ➕ Create brace image brace_img <- image_blank(width = 40, height = label_height, color = "none") brace_img <- image_annotate( brace_img, text = "}", # curly brace size = label_height * 0.9, # scale size to span rows gravity = "center", font = "Times", # or another serif font with a clear brace color = "#000000AA" ) # ➕ Combine brace and label horizontally combined_img <- image_append(c(brace_img, label_img)) # 🧷 Composite onto final image composite_img <- image_composite( composite_img, combined_img, offset = paste0("+", round(label_x+70), "+", round(label_y - label_height / 2)) )}#======================================>>>># Draw the grid and requested horizontal lines inside the left tablefinal_img <- image_draw(composite_img)# Thickness of cell border lines (in pixels)line_thickness <- 1# x positions: start and end of left table (two columns combined)x_start <- cell_xx_end <- cell_x + 2.8 * col_width# y positions for horizontal lines, adjustable for rows 1 to 7y_positions <- numeric(8) # 7 rows + header# row_1 (header row top)y_positions[1] <- table_top_y# row_2 to row_8 (header + 7 rows total)for (r in 2:8) { if (r == 2) { y_positions[r] <- table_top_y + header_row_height } else { y_positions[r] <- y_positions[r - 1] + data_row_height }}# Draw thin black horizontal arrows at each row boundary (except header)for (y in y_positions[-1]) { arrows( x0 = x_start, y0 = y, x1 = x_end, y1 = y, col = "black", lwd = line_thickness, length = 0.08, angle = 20, code = 2 )}# Add one extra arrow extending off the edge under last data rowy_adjust <- 69arrows( x0 = cell_x + 2 * col_width, y0 = y_positions[8] + y_adjust, x1 = cell_x + 3 * col_width - 15, y1 = y_positions[8] + y_adjust, col = "black", lwd = line_thickness, length = 0.08, angle = 20, code = 2)dev.off() # finish image_draw and save to final_img# Replace composite_img with final_img (arrows added)composite_img <- final_img#=========== OUTPUT FILE ============# Write outputinvisible(image_write(composite_img, path = "output/final_composite.png"))invisible()# Optionally display the image#print(composite_img)# Embed it explicitly into HTMLcat('<img src="output/final_composite.png" style="width:100%;">')#knitr::include_graphics("output/final_composite.png")```<div class="note-box">A recreation of 'Income and Expenditure of 150 Negro Families in Atlanta, GA, USA'<br>Some re-interpretations were taken:<br>- connecting lines between stacked bar chart areas were left off<br>- slightly modified '}' grouping labels were used on right hand side of bar charts.<br>- an axis was rendered on the bottom to help visualize scale<br></div>## 2 - COVID survey - interpretation<div class="question-box">Q2 - <b>Interpret what’s occurring in the survey, and discuss any results that go against your intuition.</b><br><b> Overall description</b>The COVID vaccine survey gathered responses from medical and nursing students across the U.S. regarding their attitudes toward vaccine safety, trust, and recommendations. The visualization arranges responses in a grid, with response variables in columns and explanatory variables (like age, profession, or gender) in rows. Each pane displays the mean Likert score and error bars between the 10th and 90th percentiles for each subgroup, offering insight into both central tendency and variability. The top row summarizes overall distributions, unconditioned by explanatory factors.<br><b>-</b><br><b>Interesting Trends in the Data:</b><br><b>1. <i>Trust and Profession:</i></b> <br>Medical students displayed more variability in their agreement with the statement “I trust the information that I have received about the vaccines” compared to nursing students. While both groups leaned toward agreement, the broader spread among medical students suggests more diverse opinions, possibly reflecting deeper exposure to varying sources of information or a more analytical approach to evaluating it.<br><b>2. <i>Concern About Side Effects and Age:</i></b> <br>Across all age groups, responses to concerns about “safety and side effects” hovered around a neutral average (Likert score ≈ 3), with relatively wide error bars. This indicates uncertainty or ambivalence. However, younger students tended to show slightly higher trust (i.e. lower concern scores), suggesting that age may play a role in perceived vaccine risk.<br><b>3. <i>Willingness to Recommend and Vaccination History:</i></b> <br>ARespondents with a history of receiving vaccines overwhelmingly strongly agreed with the statement “I will recommend the vaccine to family, friends, and community members.” This strong consensus highlights how personal experience with vaccines may positively reinforce trust and willingness to advocate for vaccination.Overall, the data reveal meaningful variation in how medical and nursing students interpret the science and safety of COVID-19 vaccines, highlighting the complexity of attitudes even within healthcare education.</div>```{r}#| label: label-me-2#------- no code necessary ..```## 3 - COVID survey - reconstruct<div class="question-box">Q3 ....</div><details><summary>Data Analysis - Q1</summary>```{r}#| label: label-me-3-data-table#| echo: false#| fig-asp: 0.63#| fig-width: 7#| #===========# Question 3#===========# Step 1: Read CSV without headers to inspect structure# - Define "" and "NA" as missingraw_preview <-read_csv("data/covid-survey.csv",col_names =FALSE,na =c("", "NA"),show_col_types =FALSE)# - Step 1a: print the dim of the original df.library(glue)original_dim <-dim(raw_preview)cat(glue("📄 The original data frame (raw_preview) has:\n","- {original_dim[1]} rows\n","- {original_dim[2]} columns\n"))# Step 2: View to determine the row containing real column names#View(raw_preview)# Step 3: Based on inspection, set the correct `skip` value# (assume row 2 is the real header → skip = 1)survey_raw <-read_csv("data/covid-survey.csv",skip =1,na =c("", "NA"),show_col_types =FALSE)# Step 4: Dynamically get column namescol_names <-names(survey_raw)# =====================# Diagnostic Summary# =====================# - Overall % missing values (NA)total_cells <-nrow(survey_raw) *ncol(survey_raw)missing_cells <-sum(is.na(survey_raw))missing_pct_total <-round(100* missing_cells / total_cells, 2)# - % of rows with at least one NArows_with_na <- survey_raw |>filter(if_any(everything(), is.na))pct_rows_with_na <-round(100*nrow(rows_with_na) /nrow(survey_raw), 2)row_indices_with_na <-which(apply(survey_raw, 1, function(x) any(is.na(x))))# - Rows with more than one NAna_per_row <- survey_raw |>apply(1, function(x) sum(is.na(x)))rows_with_multiple_na <-which(na_per_row >1)pct_rows_with_multiple_na <-round(100*length(rows_with_multiple_na) /nrow(survey_raw), 2)# - Create diagnostic summary tabledataset_diagnostics <-tibble(Metric =c("Total % of values missing","Percent of rows with ≥1 NA","Row indices with ≥1 NA (first 20)","Percent of rows with >1 NA","Row indices with >1 NA (first 20)" ),Value =c( missing_pct_total, pct_rows_with_na,paste(head(row_indices_with_na, 20), collapse =", "), pct_rows_with_multiple_na,paste(head(rows_with_multiple_na, 20), collapse =", ") ))# ==============================# Quarto-Ready Diagnostic Table# ==============================library(kableExtra)dataset_diagnostics |>kable(caption ="<span style='font-weight:bold; font-size:1.1em;'>Table 1. Dataset Missing Value Diagnostics</span>",escape =FALSE ) |>kable_styling(full_width =FALSE,bootstrap_options =c("striped", "hover", "condensed") ) |>row_spec(which(dataset_diagnostics$Metric =="Total % of values missing"),background ="#fff3cd"# Light highlight )# =======================================# Remove rows that are entirely NA except for response_id# =======================================# - Original dataset row countoriginal_row_count <-nrow(survey_raw)# - Detect rows where all columns *except* response_id are NAcols_except_response_id <-setdiff(names(survey_raw), "response_id")rows_to_remove <-which( survey_raw |>select(all_of(cols_except_response_id)) |>apply(1, function(x) all(is.na(x))))# - Remove the rowssurvey_clean <- survey_raw[-rows_to_remove, ]# - Number of rows removedrows_removed <-length(rows_to_remove)# - User alert with clear summarycat( glue::glue("✅ Rows with only `response_id` and all other fields missing have been removed.\n","Original dataset rows: {original_row_count}\n","Rows removed: {rows_removed}\n","Cleaned dataset size: {nrow(survey_clean)} rows × {ncol(survey_clean)} columns\n" ))# - Print row numbers removed in a 4-column layoutif (rows_removed >0) {library(knitr) formatted_rows <-paste0("row:", rows_to_remove) padded_length <-ceiling(length(formatted_rows) /4) *4 formatted_rows <-c(formatted_rows, rep("", padded_length -length(formatted_rows))) removed_matrix <-matrix(formatted_rows, ncol =4, byrow =TRUE)cat("\n\n**Rows_Removed**\n")kable(removed_matrix, col.names =NULL, align ="l")}```</details>```{r}#| label: label-me-3a# - Step 1a: print the dim of the original df.original_dim <-dim(raw_preview)cat(glue("📄 The original data frame (`raw_preview`) has:\n","- {original_dim[1]} rows\n","- {original_dim[2]} columns\n\n","⚠️ Rows with no available data (i.e., only `response_id` present)\n will be removed in preprocessing.\n","\n✅ **New Dimensions** of `survey_clean` after cleaning:\n","📊 Rows: {nrow(survey_clean)}\n","📐 Columns: {ncol(survey_clean)}\n"))#-- ... --- based on info in pdf file and .csv .. encode the following# exp_profession........... # exp_flu_vax.............. # exp_gender............... Q2 What is your gender? # exp_race................. Q3 What is your race? # exp_ethnicity............ Q4 What is your ethnicity? # exp_age_bin.............. Q1 What is your age? # exp_already_vax.......... # resp_safety.............. Q26 Based on my understanding, I believe the vaccine is safe. # resp_confidence_science.. Q34 I am confident in the scientific vetting process for the new COVID vaccines. # resp_concern_safety...... Q27 I am concerned about the safety and side effects of the vaccine. # resp_feel_safe_at_work... Q28 Getting the vaccine will make me feel safer at work. # resp_will_recommend...... Q29 I will recommend the vaccine to family, friends, and community members. # resp_trust_info.......... Q31 I trust the information that I have received about the COVID-19 vaccines.covid_survey_longer <- survey_clean |>pivot_longer(cols =starts_with("exp_"),names_to ="explanatory",values_to ="explanatory_value" ) |>mutate(explanatory_value =as.factor(explanatory_value)) |>filter(!is.na(explanatory_value)) |>pivot_longer(cols =starts_with("resp_"),names_to ="response",values_to ="response_value" )print(covid_survey_longer)```<div class="question-box"><b>Q3 code explanation:</b><br><div class="code-container"><code>covid_survey_longer <- covid_survey |> pivot_longer( cols = starts_with("exp_"), names_to = "explanatory", values_to = "explanatory_value" ) |> filter(!is.na(explanatory_value)) |> pivot_longer( cols = starts_with("resp_"), names_to = "response", values_to = "response_value" )</code></div><br> <b>first pivot_longer():</b><br> Converts all columns that start with "exp_" (e.g., exp_profession, exp_gender, etc.) from wide format into long format.<br>Creates two new columns:<br>explanatory: holds the original column names (like "exp_profession")<br>explanatory_value: holds the actual values from those columns (like "Nursing" or "1")<br> <b>second pivot_longer():</b><br>After already pivoting the explanatory variables, this takes the remaining<br> response variables (resp_safety, resp_confidence_science, etc.) and pivots them long as well.<br>Creates two new columns:<br>response: original column name<br>response_value: corresponding value<br></div><div class="question-box">create the df/tibble: covid_survey_summary_stats_by_group</div>```{r}#| label: label-me-3b# - group the data - by explanatory, explanatory_value, and response calc.# - the following stats:# - mean of the response_value# - low 10th percentile of the response_value# - high 90th percentile of the response_value# - rename the df coivd_survey_summart_stats_by_groupcovid_survey_summary_stats_by_group <- covid_survey_longer |>group_by(explanatory, explanatory_value, response) |>summarise(mean =mean(response_value, na.rm =TRUE),low =quantile(response_value, probs =0.10, na.rm =TRUE),high =quantile(response_value, probs =0.90, na.rm =TRUE),.groups ="drop" )print(covid_survey_summary_stats_by_group)#View(covid_survey_summary_stats_by_group)```<div class="question-box">create the df/tibble: covid_survey_summary_stats_all</div>```{r}#| label: label-me-3clibrary(dplyr)covid_survey_summary_stats_all <- covid_survey_longer |>group_by(response) |>summarise(mean =mean(response_value, na.rm =TRUE),low =quantile(response_value, probs =0.10, na.rm =TRUE),high =quantile(response_value, probs =0.90, na.rm =TRUE),explanatory ="All",explanatory_value =factor(""),.groups ="drop" )print(covid_survey_summary_stats_all)#View(covid_survey_summary_stats_all)```<div class="question-box">Bind the two df's<br>create the df/tibble: covid_summary_of_stats</div>```{r}#| label: label-me-3d# Get existing levels from grouped dataage_levels <-levels(covid_survey_summary_stats_by_group$explanatory_value)# Add a new level to represent the 'All' groupage_levels_with_all <-c(age_levels, "")# Create the all-summary with the new factor levelcovid_survey_summary_stats_all <- covid_survey_longer |>group_by(response) |>summarise(mean =mean(response_value, na.rm =TRUE),low =quantile(response_value, probs =0.10, na.rm =TRUE),high =quantile(response_value, probs =0.90, na.rm =TRUE),explanatory ="All",explanatory_value =factor("", levels = age_levels_with_all),.groups ="drop" )# Ensure grouped summary has the same levels toocovid_survey_summary_stats_by_group$explanatory_value <-factor( covid_survey_summary_stats_by_group$explanatory_value,levels = age_levels_with_all)# Bind them safely nowcovid_survey_summary_stats <-bind_rows( covid_survey_summary_stats_all, covid_survey_summary_stats_by_group)print(covid_survey_summary_stats)```<div class="question-box">Q3e - recreate plot </div>```{r}#| label: label-me-3-e#| fig.width: 10 # width in inches#| fig.height: 6 # height in inches# Labels for rows (explanatory variables), including Gender and Raceexplanatory_labels <-c(All ="All",exp_age_bin ="Age",exp_gender ="Gender",exp_race ="Race" ,# Added Race labelexp_ethnicity ="Ethnicity")# - call formatting for encoded datacovid_age_only <-filter_age_data(covid_survey_summary_stats_by_group)covid_gender_only <-filter_gender_data(covid_survey_summary_stats_by_group)covid_race_only <-filter_race_data(covid_survey_summary_stats_by_group)covid_ethnicity_only <-filter_ethnicity_data(covid_survey_summary_stats_by_group)# Label mappings for responseresponse_labels <-c(resp_safety ="Vaccine is safe",resp_feel_safe_at_work ="Feel safer\n at work",resp_concern_safety ="Concern about \nvaccine safety",resp_confidence_science ="Confidence in \nscientific vetting",resp_trust_info ="Trust in \nvaccine info",resp_will_recommend ="Will recommend\nvaccine")# Reorder response factor levels to match response_labelscovid_age_only <- covid_age_only %>%mutate(response =factor(response, levels =names(response_labels)))covid_gender_only <- covid_gender_only %>%mutate(response =factor(response, levels =names(response_labels)))# View distinct codes used in the exp_ethnicity variablecovid_survey_summary_stats_by_group %>%filter(explanatory =="exp_ethnicity") %>%mutate(explanatory_value =as.character(explanatory_value)) %>%distinct(explanatory_value) %>%arrange(explanatory_value)# Vector controlling heights of each row - add height for racerow_heights <-c(0.5, # - 'All' row height — adjust as needed3, # - 'exp_age_bin' row height3, # - 'exp_gender' row height - adjust as desired3, # - 'exp_race' row height - new Race row3# - ethnicity)# Reorder response factor levels for 'All' layercovid_all_only <- covid_survey_summary_stats_all %>%filter(is.finite(mean), is.finite(low), is.finite(high)) %>%mutate(response =factor(response, levels =names(response_labels)))# - vars for standardizing box size row/col# Define variables for strip appearancestrip_fill_color <-"gray90"strip_text_color <-"black"strip_text_size <-10strip_text_face <-"plain"strip_text_angle_x <-0strip_text_angle_y <-0strip_text_vjust_y <-0.5strip_placement <-"outside"# already used in your code# Call some functionsg0 <-plot_all(covid_all_only)# - second layer - Ageg1 <-plot_age(covid_age_only)# - third layer - genderg2 <-plot_gender(covid_gender_only)# Fourth layer - Raceg3 <-plot_race(covid_race_only)# Fifth layer: Ethnicity (if present)g4 <-plot_ethnicity(covid_ethnicity_only,"Error bars in range from 10th to 90th percentile")# Composite plot with 5 layers stacked (All / Age / Gender / Race / Ethnicity)composite_plot <- (g0 / g1 / g2 / g3 / g4 +plot_layout(heights = row_heights)) &theme(plot.margin =margin(0, 0, 0, 0))print(composite_plot)```<div class="note-box">A reconstruction of the table on CoVid-19 opinions from:<i>The data were collected by Pavan Shah, Giorgio Caturegli, Galen Shi, and Joshua Materi at Johns Hopkins School of Medicine.</i><b>Note:</b> Error bars shown range from 10th-90th percentile.<br><b>Note:</b>Not all data was supplied to complete a full re-construction of the table.<br><div class="text-container"><pre>#>>>>based on info in pdf file and .csv .. encode the following# exp_profession........... # exp_flu_vax.............. # exp_gender............... Q2 What is your gender? # exp_race................. Q3 What is your race? # exp_ethnicity............ Q4 What is your ethnicity? # exp_age_bin.............. Q1 What is your age?# exp_already_vax.......... # resp_safety.............. Q26 Based on my understanding, I believe the vaccine is safe.# resp_confidence_science.. Q34 I am confident in the scientific vetting process for the new COVID vaccines.# resp_concern_safety...... Q27 I am concerned about the safety and side effects of the vaccine.# resp_feel_safe_at_work... Q28 Getting the vaccine will make me feel safer at work.# resp_will_recommend...... Q29 I will recommend the vaccine to family, friends, and community members.# resp_trust_info.......... Q31 I trust the information that I have received about the COVID-19 vaccines.</pre></div></div>## 4 - COVID survey - re-reconstruct<div class="question-box">Q4 ....Make Plot from Q3, but use different end point quarantiles.<br>When the error bars represent the 25th and 75th percentiles instead of the 10th and 90th, the intervals become narrower, reflecting a tighter range around the median of the data. This change reduces the apparent variability and uncertainty in responses. Compared to the previous plot, the shorter error bars may make the group differences appear more precise but potentially understate the true variability. Therefore, while the overall trends remain similar, conclusions about the degree of uncertainty should be adjusted to recognize that the interquartile range excludes more extreme values.<br></div>```{r,echo=FALSE}#| label: label-me-4#| fig.width: 10 # width in inches#| fig.height: 8 # height in inches# - set quartile variablesleft_quartile <- 0.25right_quartile <- 0.75# ... repeat steps necessary to get the quartile data.covid_survey_summary_stats_by_group <- covid_survey_longer |> group_by(explanatory, explanatory_value, response) |> summarise( mean = mean(response_value, na.rm = TRUE), low = quantile(response_value, probs = left_quartile, na.rm = TRUE), high = quantile(response_value, probs = right_quartile, na.rm = TRUE), .groups = "drop" )covid_survey_summary_stats_all <- covid_survey_longer |> group_by(response) |> summarise( mean = mean(response_value, na.rm = TRUE), low = quantile(response_value, probs = left_quartile, na.rm = TRUE), high = quantile(response_value, probs = right_quartile, na.rm = TRUE), explanatory = "All", explanatory_value = factor(""), .groups = "drop" )# Create the all-summary with the new factor levelcovid_survey_summary_stats_all <- covid_survey_longer |> group_by(response) |> summarise( mean = mean(response_value, na.rm = TRUE), low = quantile(response_value, probs = left_quartile, na.rm = TRUE), high = quantile(response_value, probs = right_quartile, na.rm = TRUE), explanatory = "All", explanatory_value = factor("", levels = age_levels_with_all), .groups = "drop" )# Ensure grouped summary has the same levels toocovid_survey_summary_stats_by_group$explanatory_value <- factor( covid_survey_summary_stats_by_group$explanatory_value, levels = age_levels_with_all)# Bind them safely nowcovid_survey_summary_stats <- bind_rows( covid_survey_summary_stats_all, covid_survey_summary_stats_by_group)# .......print(covid_survey_summary_stats)# - call encoding formatting...covid_age_only <- filter_age_data(covid_survey_summary_stats_by_group)covid_gender_only <- filter_gender_data(covid_survey_summary_stats_by_group)covid_race_only <- filter_race_data(covid_survey_summary_stats_by_group)covid_ethnicity_only <- filter_ethnicity_data(covid_survey_summary_stats_by_group)# - call graph maker..# Call some functionsg0 <- plot_all(covid_all_only)# - second layer - Ageg1 <- plot_age(covid_age_only)# - third layer - genderg2 <- plot_gender(covid_gender_only)# Fourth layer - Raceg3 <- plot_race(covid_race_only)# Fifth layer: Ethnicity (if present)g4 <- plot_ethnicity(covid_ethnicity_only,"Error bars in range from 25th to 75th percentile")# Composite plot with 5 layers stacked (All / Age / Gender / Race / Ethnicity)composite_plot <- (g0 / g1 / g2 / g3 / g4 + plot_layout(heights = row_heights)) & theme(plot.margin = margin(0, 0, 0, 0))print(composite_plot)```<div class="note-box">A reconstruction of the table on CoVid-19 opinions from:<i>The data were collected by Pavan Shah, Giorgio Caturegli, Galen Shi, and Joshua Materi at Johns Hopkins School of Medicine.</i><br><b>Note:</b> Error bars shown range from 25th-75th percentile.<br><b>Note:</b>Not all data was supplied to complete a full re-construction of the table.<br></div>## 5 - COVID survey - another view<div class="question-box"><b>Q5a ....</b><br>COVID survey - another view. Create two bar charts of the Likert data for the six survey questions in from the plot in Exercise 2. This should be a single plot visualizing the percentages of each possible answer, with different questions on the y-axis. Use an appropriate color scale.<br><br><b>a. Create a diverging bar chart. Write alt text for your visualization as well.</b><br><b>Write alt text for your visualization as well.</b></div>```{r}#| label: label-me-5a#| fig.width: 10 # width in inches#| fig.height: 6 # height in inches# Define response labelsresponse_labels <-c(resp_safety ="Vaccine is safe",resp_feel_safe_at_work ="Feel safer\n at work",resp_concern_safety ="Concern about \nvaccine safety",resp_confidence_science ="Confidence in \nscientific vetting",resp_trust_info ="Trust in \nvaccine info",resp_will_recommend ="Will recommend\nvaccine")# Step 1: Compute % response per question and response_valuelikert_summary <- covid_survey_longer %>%group_by(response, response_value) %>%summarise(count =n(), .groups ="drop") %>%group_by(response) %>%mutate(percent = count /sum(count) *100) %>%ungroup() %>%mutate(response =factor(response, levels =unique(response)), # preserve orderresponse_value =factor(response_value, levels =1:5) )# Center percentages for diverging bar chart (v1)likert_summary <- likert_summary %>%mutate(centered_percent =case_when( response_value <3~-percent, response_value ==3~0, response_value >3~ percent ) )# Compute mean Likert per questionmean_scores <- covid_survey_longer %>%group_by(response) %>%summarise(mean_value =mean(response_value, na.rm =TRUE))# Recompute likert_summary with deviation × percentlikert_summary <- covid_survey_longer %>%group_by(response, response_value) %>%summarise(count =n(), .groups ="drop") %>%group_by(response) %>%mutate(percent = count /sum(count) *100) %>%ungroup() %>%mutate(response_value =as.numeric(as.character(response_value))) %>%left_join(mean_scores, by ="response") %>%mutate(deviation = response_value - mean_value,centered_percent = deviation * percent )summary(likert_summary)# Diverging bar chart (centered around mean), with descriptive y-axis labelsg5 <-ggplot(likert_summary, aes(x = centered_percent,y =fct_rev(fct_relabel(factor(response), ~ response_labels[.x])),fill =factor(response_value))) +geom_bar(stat ="identity", width =0.7) +scale_fill_manual(values =c("1"="#d73027", "2"="#fc8d59", "3"="#ffffbf", "4"="#91bfdb", "5"="#4575b4"),name ="Likert response",labels =c("Strongly disagree", "Disagree", "Neutral", "Agree", "Strongly agree") ) +labs(title ="Diverging Bar Chart of Likert Responses Centered Around Question Mean",x ="Deviation × Percentage",y ="Survey question" ) +theme_minimal()print(g5)```<div class="note-box"><b>Q5a ....alt text description</b><br>Alt text: A diverging bar chart visualizes Likert-scale responses to six COVID-19 vaccine-related statements. Each statement is represented by a horizontal bar segmented by response categories: strongly disagree, disagree, neutral, agree, strongly agree, and NA, each in a distinct color. The x-axis shows a centered scale ranging from -60 to 60, indicating deviation from the mean response, scaled by percent. Statements include concerns about safety, trust in vaccine information, and willingness to recommend vaccination. The chart reveals variation in sentiment and notable asymmetries across response patterns.</div><div class="question-box"><b>Q5b ....</b><br>COVID survey - another view. Create two bar charts of the Likert data for the six survey questions in from the plot in Exercise 2. This should be a single plot visualizing the percentages of each possible answer, with different questions on the y-axis. Use an appropriate color scale.<br><b>b. Create a 100% bar chart</b><br><b>Write alt text for your visualization as well.</b></div>```{r}#| label: label-me-5b#| fig.width: 10 # width in inches#| fig.height: 6 # height in inches# Ensure response_value is factor for 100% chartlikert_summary <- likert_summary %>%mutate(response_value =factor(response_value, levels =1:5))# 100% stacked bar chart, with descriptive y-axis labelsg100 <-ggplot(likert_summary, aes(x = percent,y =fct_rev(fct_relabel(factor(response), ~ response_labels[.x])),fill = response_value)) +geom_bar(stat ="identity", width =0.7) +scale_fill_manual(values =c("1"="#d73027", "2"="#fc8d59", "3"="#ffffbf", "4"="#91bfdb", "5"="#4575b4"),name ="Response",labels =c("Strongly disagree", "Disagree", "Neutral", "Agree", "Strongly agree") ) +labs(title ="100% Stacked Bar Chart of Likert Responses",x ="Percentage of responses",y ="Survey question" ) +theme_minimal()print(g100)```<div class="note-box"><b>Q5b ....alt text description</b><br>Alt text: A 100% stacked bar chart displays Likert-scale survey responses to six statements about COVID-19 vaccines. Each bar corresponds to a different statement, such as “Concern about vaccine safety” and “Will recommend vaccine.” The bars are divided into color-coded segments representing response options: Strongly disagree (light blue), Disagree (orange), Neutral (yellow), Agree (red), Strongly agree (dark blue), and NA (gray). The x-axis shows percentage values from 0 to 100%, allowing for comparison of relative agreement across the questions. The chart highlights variation in public sentiment—some questions skew toward strong agreement, while others show more mixed or polarized responses.</div>